---
title: "final_estimations"
author: "Matthew Gordon"
date: "2/10/2021"
output: latex_document
header-includes:

  \usepackage{pdflscape}
  \newcommand{\blandscape}{\begin{landscape}}
  \newcommand{\elandscape}{\end{landscape}}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
rootdir <- dirname(getwd())
```

Load packages and data - create functions

```{r, message=FALSE}
library(tidyverse)
library(tidycensus)
library(AER)
library(lfe)
library(ivmodel)
library(quantreg)
library(sandwich)
library(boot)
library(fixest)
library(stargazer)
library(grid)
library(gridExtra)
library(ggstance)

df.master <- read_csv(paste(rootdir, "/data/master.csv", sep = ""), 
                      na=c("",".","NA")) %>%
        mutate(adj_population = tot_population*(1+device_chg1020),
               adjdeathrate = cumulative_total_deaths*100000/adj_population,
               adjhosprate = cumulative_hospitalized*100000/adj_population)

df.str_race <- read_csv(paste(rootdir, "/data/race_stratified.csv", sep = ""), 
                      na=c("",".","NA"))

df.str_age <- read_csv(paste(rootdir, "/data/age_stratified.csv", sep = ""), 
                      na=c("",".","NA"))

df.str <- read_csv(paste(rootdir, "/data/master_stratified.csv", sep = ""), 
                      na=c("",".","NA"))

indvars <- c("pm_idw", "no2_idw", "no_idw", "pm_closest", "no2_closest", "no_closest", 
             "pm_idw_timewts", "no2_idw_timewts", "no_idw_timewts", "pm_closest_sameside", "no2_closest_sameside", "no_closest_sameside")
rests <- c(0, 2000, 1000, 500)
geogs <- c("all", "OB")
depvars <- c("cumulative_total_deaths", "cumulative_hospitalized")

set.seed(13)
```

## Define functions

```{r}
## test vars 
#depvar <- "cumulative_total_deaths"
#ind_var <- "pm_idw"
#geography <- "OB"
#restriction <- 1000

robustses <- function(x) {
        sqrt(diag(vcovHC(x, type = "HC1")))
}

createdf <- function(geography, restriction, race, age) {
        if (!missing(age) & !missing(race)) {
                df <- df.str[df.str$raceethnicity==race & df.str$agegroup==age,]
        } else if (missing(race)) {
                if (missing(age)) {
                        df <- df.master
                } else df <- filter(df.str_age, agegroup==age)
        } else df <- filter(df.str_race, raceethnicity==race)
        
        if (geography=="all") {
                df <- df
        } else if (geography=="Man") {
                df <- filter(df, below_110th==1)
        } else {
                df <- filter(df, below_110th==0)
        }
        
        if (restriction==0) {
                df <- df
                ivs_raw <- "downwind_5_r + downwind_3_r + downwind_2_r + downwind_1_r + downwind_0_r"
        } else {
                df <- filter(df, near_dist < restriction)
                if (restriction == 2000) {
                        ivs_raw <- "downwind_2_r + downwind_1_r + downwind_0_r"
                } else if (restriction == 1000) {
                        ivs_raw <- "downwind_1_r + downwind_0_r"
                } else {
                        ivs_raw <- "downwind_0_r"
                }
        }
        
        df.i <- select(df, downwind_5_r, downwind_3_r, downwind_2_r, downwind_1_r, downwind_0_r,
                          pm_idw, no_idw, no2_idw, pm_closest, no_closest, no2_closest)
        df <- df[complete.cases(df.i),]
        return(list(df, ivs_raw))
}


ols_estimation <- function(depvar, ind_var, geography, restriction){
        df <- createdf(geography, restriction)[[1]]
        olsregp <- lm(as.formula(paste("log(", depvar, " + 1) ~", ind_var,
                                              "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df, subset = tot_population > 500 & near_dist>50)
        return(olsregp)
}

ols_estimation_w_cont <- function(depvar, ind_var, geography, restriction, controls, weights = NULL){
        df <- createdf(geography, restriction)[[1]]
        df <- mutate(df, deprate = get(depvar))
        if (is.null(weights)) w = rep(1, nrow(df)) else if (weights=="tot") w = df$tot_population else w = df$adj_population
        olsregp <- lm(as.formula(paste("deprate ~", ind_var,
                                              "+ near_dist + as.factor(puma) + as.factor(station)", controls)),
                        data = df, subset = tot_population > 500 & near_dist>50, weights = w)
        return(olsregp)
}

dy.dx.ame <- function(formula, data, indices, delta, ind_var){
        dfi <- data[indices, ]
        naiveregfep <- ivreg(formula, data = dfi)
        b1 <- naiveregfep$coefficients[2]
        yhats <- naiveregfep$fitted.values
        ehats <- naiveregfep$residuals
        dat2 <- dfi
        dat2[, ind_var] <- unlist(dat2[, ind_var]) + delta
        yhatdelta <- predict(naiveregfep, newdata = dat2)
        dy.dxs <- exp(yhatdelta)*mean(exp(ehats)) - exp(yhats)*mean(exp(ehats))
        return(mean(dy.dxs))
}

iv_estimation <- function(depvar, ind_var, geography, restriction, age, race, boot = TRUE){
        if (!missing(age) & !missing(race)) {
                cdf <- createdf(geography, restriction, race = race, age = age)
        } else if (missing(age)) {
                        if (missing(race)) {
                                cdf <- createdf(geography, restriction) 
                        } else cdf <- createdf(geography, restriction, race = race)
                } else cdf <- createdf(geography, restriction, age = age)
        
        df <- cdf[[1]]
        ivs_raw <- cdf[[2]]
        
        formula <- as.formula(paste("log(", depvar, " + 1) ~", ind_var, "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)"))
        
        df <- if(missing(age) & missing(race)){filter(df, tot_population > 500 & near_dist>=50)} else if (!missing(age) & !missing(race)) {
                        filter(df, population > 50 & near_dist>=50)
                } else filter(df, population > 200 & near_dist>=50)
        wts <- if (grepl("adj", depvar)) {df$adj_population}  else if (grepl("rate", depvar)) {df$tot_population} else {rep(1, nrow(df))}
        
        naiveregfep <- ivreg(formula,
                        data = df, weights = wts)
        if (boot) {
                if (grepl("rate", depvar)) {return(naiveregfep)}  else {
                        dfb <- filter(df, near_dist>=50)
                        delta <- if (ind_var=="pm_idw") {.16} else if (ind_var=="no2_idw") {.38} else {.73}
                        naivemfxboot <- boot(data = dfb, statistic = dy.dx.ame, formula = formula, delta = delta, ind_var = ind_var, 
                                             R = 1000, parallel = "multicore", ncpus = 6)
                        naivemfxbootci <- boot.ci(naivemfxboot, type = "perc")
        return(list(naiveregfep, naivemfxboot, naivemfxbootci)) }
        } else return(naiveregfep)
}

poisregc <- function(depvar, ind_var, ivs_raw, data, indices){
        dfi <- data[indices, ]
        wts <- if (grepl("adj", depvar)) {dfi$adj_population}  else if (grepl("rate", depvar)) {dfi$tot_population} else {rep(1, nrow(dfi))}
        npstg1 <- lm(as.formula(paste(ind_var, "~", ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                            data = dfi, weights = wts)
        dfi$stg1e <- npstg1$residuals
        poisreg <- fepois(as.formula(paste(depvar, "~", ind_var, "+ stg1e + near_dist + as.factor(station) | as.factor(puma)")),
                            data = dfi, warn = FALSE, notes = FALSE, weights = wts)
        return(as.numeric(poisreg$coefficients[1]))
}

poismfx <- function(depvar, ind_var, ivs_raw, data, indices, delta){
        dfi <- data[indices, ]
        wts <- if (grepl("adj", depvar)) {dfi$adj_population}  else if (grepl("rate", depvar)) {dfi$tot_population} else {rep(1, nrow(dfi))}
        npstg1 <- lm(as.formula(paste(ind_var, "~", ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                            data = dfi, weights = wts)
        dfi$stg1e <- npstg1$residuals
        poisreg <- fepois(as.formula(paste(depvar, "~", ind_var, "+ stg1e + near_dist + as.factor(station) | as.factor(puma)")),
                            data = dfi, warn = FALSE, notes = FALSE, weights = wts)
        dat2 <- dfi
        dat2[, ind_var] <- unlist(dat2[, ind_var]) + delta
        yhatdelta <- predict(poisreg, newdata = dat2)
        dydx <- mean(yhatdelta - poisreg$fitted.values)
        return(dydx)
}

pois_estimation <- function(depvar, ind_var, geography, restriction){
        cdf <- createdf(geography, restriction)
        df <- cdf[[1]]
        ivs_raw <- cdf[[2]]
        dfb <- filter(df, tot_population > 500 & near_dist>=50)
        delta <- if (ind_var=="pm_idw") {.16} else if (ind_var=="no2_idw") {.38} else {.73}
        
        poisboot <- boot(data = dfb, statistic = poisregc, R = 1000, depvar = depvar, ind_var = ind_var, ivs_raw = ivs_raw,
                         parallel = "multicore", ncpus = 6)
        poisci <- boot.ci(poisboot, type = "perc")
        poisbootmfx <- boot(data = dfb, statistic = poismfx, R = 1000, depvar = depvar, ind_var = ind_var, ivs_raw = ivs_raw, delta = delta, 
                            parallel = "multicore", ncpus = 6)
        poiscimfx <- boot.ci(poisbootmfx, type = "perc")
        return(list(poisboot, poisci, poiscimfx))
}

robustimations <- function(depvar, ind_var, geography, restriction, controls, ql, qh){
        cdf <- createdf(geography, restriction)
        df <- cdf[[1]]
        ivs_raw <- cdf[[2]]
        
        if (ind_var=="pm_idw") { 
                closevar <- "pm_closest"} else if (ind_var=="no_idw"){
                        closevar <- "no_closest"} else closevar <- "no2_closest"
        
        ### naive iv - closest monitor
        
        naiveregfepcm <- ivreg(as.formula(paste("log(", depvar, " + 1) ~", closevar, 
                                                  "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df, subset = tot_population > 500 & near_dist>=50)
        
        ### naive iv - pop threshold 100
        
        naiveregfepw <- ivreg(as.formula(paste("log(", depvar, " + 1) ~", ind_var, 
                                                  "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df, subset = tot_population > 100 & near_dist>=50)
        
        ### naive iv - trim top ql% and bottom qh% by population
        qh <- quantile(df$tot_population, probs = 1-qh)
        ql <- quantile(df$tot_population, probs = ql)
        
        naiveregfepw2 <- ivreg(as.formula(paste("log(", depvar, " + 1) ~", ind_var, 
                                                  "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df, subset = tot_population > ql & tot_population < qh & near_dist>=50)
        
        ### naive iv - 200m threshold
        
        cdf2 <- createdf(geography, 2000)
        df2 <- cdf2[[1]]
        ivs_raw2 <- cdf2[[2]]
        
        naiveregfept <- ivreg(as.formula(paste("log(", depvar, " + 1) ~", ind_var, 
                                                  "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw2, "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df2, subset = tot_population > 500 & near_dist > 200)
        
        ### naive iv - controls
        
        naiveregfepc <- ivreg(as.formula(paste("log(", depvar, " + 1) ~", ind_var, controls,
                                                  "+ near_dist + as.factor(puma) + as.factor(station) |",
                                ivs_raw, controls, "+ near_dist + as.factor(puma) + as.factor(station)")),
                        data = df, subset = tot_population > 500 & near_dist>=50)
        
        return(list(naiveregfepw, naiveregfepw2, naiveregfept, naiveregfepc, naiveregfepcm))
}

```

### Estimation - Final tables

## First stage 

Normal IDW

```{r, results="asis"}
mainsubset <- df.master$tot_population > 500 & df.master$near_dist>=50

pvars <- c("pm_idw", "no_idw", "no2_idw")

mods <- rep(list(NA), 12)

for (i in c(1:3)) {
        mods[[i]] <- felm(get(pvars[i]) ~ downwind_0_r + near_dist | 
                        as.factor(puma) + as.factor(station),
                data = df.master, subset = mainsubset & near_dist < 500)

        mods[[i+3]] <- felm(get(pvars[i]) ~ downwind_0_r + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 500 & below_110th==0)
        
        mods[[i+6]] <- felm(get(pvars[i]) ~ downwind_0_rr + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 500)
        
        mods[[i+9]] <- felm(get(pvars[i]) ~ downwind_0_rr + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 500 & below_110th==0)
}

stargazer(mods, type = "text", style = "QJE", df = FALSE, column.labels = rep(c("PM2.5", "NO", "NO2"),4),
          omit = c("puma", "station", "near_dist"), omit.stat = c("ser"), keep.stat = c("n"), digits = 2,
          se = lapply(mods, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          add.lines = list(c(rep("", 10)),
                           c("Geography", rep(c(rep("Citywide", 3), rep("Outer Borough", 3)), 2)))
          )
```

Based on closest monitor

```{r, results="asis"}
mainsubset <- df.master$tot_population > 500 & df.master$near_dist>=50

pvars <- c("pm_closest", "no_closest", "no2_closest")

mods <- rep(list(NA), 12)

for (i in c(1:3)) {
        mods[[i]] <- felm(get(pvars[i]) ~ downwind_0_r + near_dist | 
                        as.factor(puma) + as.factor(station),
                data = df.master, subset = mainsubset & near_dist < 500)

        mods[[i+3]] <- felm(get(pvars[i]) ~ downwind_0_r + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 500 & below_110th==0)
        
        mods[[i+6]] <- felm(get(pvars[i]) ~ downwind_1_r + downwind_0_r + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 1000)
        
        mods[[i+9]] <- felm(get(pvars[i]) ~ downwind_1_r + downwind_0_r + near_dist | 
                                as.factor(puma) + as.factor(station),
                        data = df.master, subset = mainsubset & near_dist < 1000 & below_110th==0)
}

stargazer(mods, type = "text", style = "QJE", df = FALSE, column.labels = rep(c("PM2.5", "NO", "NO2"),4),
          omit = c("puma", "station", "near_dist"), omit.stat = c("ser"), keep.stat = c("n"), digits = 2,
          se = lapply(mods, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          add.lines = list(c(rep("", 10)),
                           c("Geography", rep(c(rep("Citywide", 3), rep("Outer Borough", 3)), 2)))
          )

```

## Rate vs Count with different measures of pollution

```{r, results="asis"}
### Main specifications - tot pop > 500, near_dist < 500, drop near_dist<50, all depvars log + 1
mods1 <- rep(list(NA), 72)
q <- 1

for (k in c("cumulative_total_deaths", "death_rate", "adjdeathrate")) {
        for (i in indvars) {
                for (j in geogs[1:2]) {
                        mods1[[q]] <- iv_estimation(k, i, j, 500, boot = FALSE)
                        q <- q+1
                }
        }
}

se_iv <- lapply(mods1, robustses)

stargazer(mods1[c(1:12)], se = se_iv[c(1:12)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - log-linear (Main Specifications)",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))

stargazer(mods1[c(13:24)], se = se_iv[c(13:24)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - log-linear (Main Specifications)",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))

stargazer(mods1[c(25:36)], se = se_iv[c(25:36)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths Rates - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))

stargazer(mods1[c(37:48)], se = se_iv[c(37:48)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths Rates - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))

stargazer(mods1[c(49:60)], se = se_iv[c(49:60)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Safegraph adjusted Death Rates - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))

stargazer(mods1[c(61:72)], se = se_iv[c(61:72)], type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Safegraph adjusted Death Rates - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 6))))
```

## Effects on Deaths

```{r, results="asis", message = FALSE}
## IV reg

deathregs <- rep(list(NA), 6)
se_iv <- rep(list(NA), 6)
df.wi <- filter(df.master, tot_population > 500 & near_dist>=50 & near_dist < 500)
wicis <- rep(list(NA), 6)

q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                deathregs[[q]] <- iv_estimation("cumulative_total_deaths", i, j, 500)
                se_iv[[q]] <- sqrt(diag(vcovHC(deathregs[[q]][[1]], type = "HC1")))
                
                ## Weak Instrument CIs
                if (j == "OB") {
                        df.wi2 <- filter(df.wi, below_110th==0)
                } else if (j == "Man") {
                        df.wi2 <- filter(df.wi, below_110th==1)
                } else df.wi2 <- df.wi
                
                Y <- df.wi2[,c("cumulative_total_deaths")]
                Y <- as.matrix(log(Y + 1))
                D <- as.matrix(df.wi2[,c(i)])
                Z <- as.matrix(df.wi2[,c("downwind_1_r", "downwind_0_r")])
                X <- df.wi2[,c("near_dist", "puma", "station")]
                X$puma <- as.factor(X$puma)
                X$station <- as.factor(X$station)
                X <- model.matrix(~.+0, data = X)
                
                wireg <- ivmodel(Y, D, Z, X, heteroSE = TRUE, manyweakSE = TRUE, intercept = FALSE)
                wicis[[q]] <- wireg$CLR$ci

                q <- q+1
        }
}

stargazer(lapply(deathregs, function(x) x[[1]]), se = se_iv, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear (Main Specifications)",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Robust CIs", unlist(lapply(wicis, function(x) paste("(", round(x[1], 2), ", ", round(x[2], 2), ")", sep = "")))),
                           c(rep("", 7)),
                           c("AME", unlist(lapply(deathregs, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(deathregs, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c("F statistic", unlist(lapply(deathregs, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregs, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))),
                           c(rep("", 7)))
          )

## OLS

deathregs_o <- rep(list(NA), 6)
se_o <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                deathregs_o[[q]] <- ols_estimation("cumulative_total_deaths", i, j, 500)
                se_o[[q]] <- sqrt(diag(vcovHC(deathregs_o[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(deathregs_o, se = se_o, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - OLS",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

## OLS w controls and rate depvar
controls <- " + tot_population + white + black + hispanic + tot_age65_74 + 
                tot_age75up + tot_ed_hs + tot_ed_none + tot_inc_percap  +
                tot_owner_occupied + tot_public_assistance"

deathregs_oc <- rep(list(NA), 6)
se_oc <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                deathregs_oc[[q]] <- ols_estimation_w_cont("cumulative_total_deaths", i, j, 500, controls)
                se_oc[[q]] <- sqrt(diag(vcovHC(deathregs_oc[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(deathregs_oc, se = se_oc, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Death Rate - OLS",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "Population", "White", "Black/African American", "Hispanic/Latino",
                               "Age 65-74", "Age 75+", "High School Education", "Less than HS education", "Income per capita", "Owner Occupied",
                               "Receiving Public Assistance"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

## Poisson

deathregs_p <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                deathregs_p[[q]] <- pois_estimation("cumulative_total_deaths", i, j, 500)
                q <- q+1
        }
}

stargazer(deathregs_o, ci = TRUE, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(deathregs_p, function(x) c(1, x[[1]]$t0, rep(1, 57))),
          ci.custom = lapply(deathregs_p, function(x) cbind(c(0, x[[2]]$percent[4], rep(0,57)),
                                                            c(1, x[[2]]$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"), 
          add.lines = list(c("AME", unlist(lapply(deathregs_p, function(x) round(x[[3]]$t0, 2)))),
                           c("", unlist(lapply(deathregs_p, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )
```

## Effects on Hospitalizations

```{r, results="asis"}
## IV regs

hospregs <- rep(list(NA), 6)
se_ivh <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                hospregs[[q]] <- iv_estimation("cumulative_hospitalized", i, j, 500)
                se_ivh[[q]] <- sqrt(diag(vcovHC(hospregs[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(hospregs, function(x) x[[1]]), se = se_ivh, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Hospitalizations - log-linear (Main Specifications)",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("AME", unlist(lapply(hospregs, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(hospregs, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c("F statistic", unlist(lapply(deathregs, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregs, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))),
                           c(rep("", 7)))
          )

## OLS

hospregs_o <- rep(list(NA), 6)
se_oh <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                hospregs_o[[q]] <- ols_estimation("cumulative_hospitalized", i, j, 500)
                se_oh[[q]] <- sqrt(diag(vcovHC(hospregs_o[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(hospregs_o, se = se_oh, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - OLS",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

## OLS w controls 
hospregs_oc <- rep(list(NA), 6)
se_och <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                hospregs_oc[[q]] <- ols_estimation_w_cont("cumulative_hospitalized", i, j, 500, controls)
                se_och[[q]] <- sqrt(diag(vcovHC(hospregs_oc[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(hospregs_oc, se = se_och, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - OLS",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "Population", "White", "Black/African American", "Hispanic/Latino",
                               "Age 65-74", "Age 75+", "High School Education", "Less than HS education", "Income per capita", "Owner Occupied",
                               "Receiving Public Assistance"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

## Poisson

hospregs_p <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                hospregs_p[[q]] <- pois_estimation("cumulative_hospitalized", i, j, 500)
                q <- q+1
        }
}

stargazer(hospregs_o, ci = TRUE, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(hospregs_p, function(x) c(1, x[[1]]$t0, rep(1, 57))),
          ci.custom = lapply(hospregs_p, function(x) cbind(c(0, x[[2]]$percent[4], rep(0,57)),
                                                            c(1, x[[2]]$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"), 
          add.lines = list(c("AME", unlist(lapply(hospregs_p, function(x) round(x[[3]]$t0, 2)))),
                           c("", unlist(lapply(hospregs_p, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )
```

## Near Distance - Deaths

```{r, results="asis"}
## PM

deathregsr <- rep(list(NA), 8)
se_ivr <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                deathregsr[[q]] <- iv_estimation("cumulative_total_deaths", "pm_idw", j, r)
                se_ivr[[q]] <- sqrt(diag(vcovHC(deathregsr[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregsr, function(x) x[[1]]), se = se_ivr, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("AME", unlist(lapply(deathregsr, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(deathregsr, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(deathregsr, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregsr, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

## NO2

deathregsrno2 <- rep(list(NA), 8)
se_ivrno2 <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                deathregsrno2[[q]] <- iv_estimation("cumulative_total_deaths", "no2_idw", j, r)
                se_ivrno2[[q]] <- sqrt(diag(vcovHC(deathregsrno2[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregsrno2, function(x) x[[1]]), se = se_ivrno2, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("AME", unlist(lapply(deathregsrno2, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(deathregsrno2, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(deathregsrno2, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregsrno2, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

## NO

deathregsrno <- rep(list(NA), 8)
se_ivrno <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                deathregsrno[[q]] <- iv_estimation("cumulative_total_deaths", "no_idw", j, r)
                se_ivrno[[q]] <- sqrt(diag(vcovHC(deathregsrno[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregsrno, function(x) x[[1]]), se = se_ivrno, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("AME", unlist(lapply(deathregsrno, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(deathregsrno, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(deathregsrno, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregsrno, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

```





## Near Distance - Hospitalizations

```{r, results="asis"}
## PM

hospregsr <- rep(list(NA), 8)
se_ivrh <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                hospregsr[[q]] <- iv_estimation("cumulative_hospitalized", "pm_idw", j, r)
                se_ivrh[[q]] <- sqrt(diag(vcovHC(hospregsr[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(hospregsr, function(x) x[[1]]), se = se_ivrh, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("AME", unlist(lapply(hospregsr, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(hospregsr, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(hospregsr, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(hospregsr, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

## NO2

hospregsrno2 <- rep(list(NA), 8)
se_ivrno2h <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                hospregsrno2[[q]] <- iv_estimation("cumulative_hospitalized", "no2_idw", j, r)
                se_ivrno2h[[q]] <- sqrt(diag(vcovHC(hospregsrno2[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(hospregsrno2, function(x) x[[1]]), se = se_ivrno2h, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("AME", unlist(lapply(hospregsrno2, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(hospregsrno2, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(hospregsrno2, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(hospregsrno2, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

## NO

deathregsrnoh <- rep(list(NA), 8)
se_ivrnoh <- rep(list(NA), 8)
q <- 1

for (j in geogs) {
        for (r in c(0, 2000, 1000, 500)) {
                deathregsrnoh[[q]] <- iv_estimation("cumulative_hospitalized", "no_idw", j, r)
                se_ivrnoh[[q]] <- sqrt(diag(vcovHC(deathregsrnoh[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregsrnoh, function(x) x[[1]]), se = se_ivrnoh, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("AME", unlist(lapply(deathregsrnoh, function(x) round(x[[2]]$t0,2)))),
                           c("", unlist(lapply(deathregsrnoh, 
                                                        function(x) paste("(", round(x[[3]]$percent[4], 2), ", ", round(x[[3]]$percent[5], 2), ")", sep = "")))),
                           c(rep("", 9)),
                           c("Geography", rep("All", 4), rep("Outer-Borough", 4)),
                           c("Max distance to closest highway", rep(c("All", "2km", "1km", "0.5km"), 3)),
                           c(rep("", 9)),
                           c("F statistic", unlist(lapply(deathregsrnoh, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[1,3], 2)))),
                           c("Sargan p value", unlist(lapply(deathregsrnoh, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE, vcov. = sandwich)$diagnostics[3,4], 2)))))
          )

```


### Race Tables

```{r, results="asis"}
deathregs_race <- rep(list(NA), 12)
se_iv_race <- rep(list(NA), 12)
race_strat <- c("White", "Hispanic/Latino", "Black/African American", "Asian/Pacific Islander")

q <- 1
for (i in indvars[1:3]) {
        for (j in race_strat) {
                deathregs_race[[q]] <- iv_estimation("stratified_totaldeaths", i, "All", 500, race = j)
                se_iv_race[[q]] <- sqrt(diag(vcovHC(deathregs_race[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregs_race, function(x) x[[1]]), se = se_iv_race, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Race/Ethnicity", rep(c("W", "H", "B", "A"), 3)),
                           c("F stat", unlist(lapply(deathregs_race, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(deathregs_race, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )

hospregs_race <- rep(list(NA), 12)
se_iv_raceh <- rep(list(NA), 12)

q <- 1
for (i in indvars[1:3]) {
        for (j in race_strat) {
                hospregs_race[[q]] <- iv_estimation("stratified_hospitalized", i, "All", 500, race = j)
                se_iv_raceh[[q]] <- sqrt(diag(vcovHC(hospregs_race[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(hospregs_race, function(x) x[[1]]), se = se_iv_raceh, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Race/Ethnicity", rep(c("W", "H", "B", "A"), 3)),
                           c("F stat", unlist(lapply(hospregs_race, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(hospregs_race, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )
```

### Race Tables - Interactions approach

```{r, results="asis"}
deathregs_race <- rep(list(NA), 3)
se_iv_race <- rep(list(NA), 3)
pvals <- rep(list(NA), 3)

# filter tracts where total population < 500 as before in main specifications
df <- filter(df.str_race, tot_population > 500 & near_dist >=50 & near_dist <= 500 &
               raceethnicity %in% c("White", "Hispanic/Latino", "Black/African American", "Asian/Pacific Islander"))
df$raceethnicity <- factor(df$raceethnicity, levels = c("White", "Hispanic/Latino", "Black/African American", "Asian/Pacific Islander"))
ivs_raw <- "downwind_0_r"
depvar = "stratified_totaldeaths"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":raceethnicity + raceethnicity + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":raceethnicity + raceethnicity + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_race[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_race[[q]], vcov = vcovHC(deathregs_race[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_race, se = se_iv_race, p = pvals, type = "latex", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "Hispanic/Latino", "Black", "Asian/Pacific Islander", 
                               "PM_{2.5}:Hispanic/Latino", "PM_{2.5}:Black", "PM_{2.5}:Asian/Pacific Islander", 
                               "NO_{2}:Hispanic/Latino", "NO_{2}:Black", "NO_{2}:Asian/Pacific Islander", 
                               "NO:Hispanic/Latino", "NO:Black", "NO:Asian/Pacific Islander")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:5, (length(pvals[[1]])-2):length(pvals[[1]]))], pvals[[2]][c(2:5, (length(pvals[[2]])-2):length(pvals[[2]]))], pvals[[3]][c(2:5, (length(pvals[[3]])-2):length(pvals[[3]]))]), method = "BH")
print(padj)

depvar = "stratified_hospitalized"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":raceethnicity + raceethnicity + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":raceethnicity + raceethnicity + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_race[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_race[[q]], vcov = vcovHC(deathregs_race[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_race, se = se_iv_race, p = pvals, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "Hispanic/Latino", "Black", "Asian/Pacific Islander", 
                               "PM_{2.5}:Hispanic/Latino", "PM_{2.5}:Black", "PM_{2.5}:Asian/Pacific Islander", 
                               "NO_{2}:Hispanic/Latino", "NO_{2}:Black", "NO_{2}:Asian/Pacific Islander", 
                               "NO:Hispanic/Latino", "NO:Black", "NO:Asian/Pacific Islander")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:5, (length(pvals[[1]])-2):length(pvals[[1]]))], pvals[[2]][c(2:5, (length(pvals[[2]])-2):length(pvals[[2]]))], pvals[[3]][c(2:5, (length(pvals[[3]])-2):length(pvals[[3]]))]), method = "BH")
print(padj)
```

### Age Tables

```{r, results="asis"}
deathregs_age <- rep(list(NA), 6)
se_iv_age <- rep(list(NA), 6)
age_strat <- c("18-64", "65up")

q <- 1
for (i in indvars[1:3]) {
        for (j in age_strat) {
                deathregs_age[[q]] <- iv_estimation("stratified_totaldeaths", i, "All", 500, age = j)
                se_iv_age[[q]] <- sqrt(diag(vcovHC(deathregs_age[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(deathregs_age, function(x) x[[1]]), se = se_iv_age, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Age", rep(c("18-64", "65+"), 3)),
                           c("F stat", unlist(lapply(deathregs_age, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(deathregs_age, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )

hospregs_age <- rep(list(NA), 6)
se_iv_ageh <- rep(list(NA), 6)

q <- 1
for (i in indvars[1:3]) {
        for (j in age_strat) {
                hospregs_age[[q]] <- iv_estimation("stratified_hospitalized", i, "All", 500, age = j)
                se_iv_ageh[[q]] <- sqrt(diag(vcovHC(hospregs_age[[q]][[1]], type = "HC1")))
                q <- q+1
        }
}

stargazer(lapply(hospregs_age, function(x) x[[1]]), se = se_iv_ageh, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Age", rep(c("18-64", "65+"), 3)),
                           c("F stat", unlist(lapply(hospregs_age, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(hospregs_age, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )
```

### Age Tables - Interactions approach

```{r, results="asis"}
deathregs_age <- rep(list(NA), 3)
se_iv_age <- rep(list(NA), 3)
pvals <- rep(list(NA), 3)

df <- filter(df.str_age, tot_population > 500 & near_dist >=50 & near_dist <= 500)
ivs_raw <- "downwind_0_r"
depvar = "stratified_totaldeaths"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":agegroup + agegroup + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":agegroup + agegroup + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_age[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_age[[q]], vcov = vcovHC(deathregs_age[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_age, se = se_iv_race, p = pvals, type = "latex", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "age 18-64", "age 65+", "PM_{2.5}:age 18-64", "PM_{2.5}:age 65+", 
                               "NO_{2}:age 18-64", "NO_{2}:age 65+", "NO:age 18-64", "NO:age 65+")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:4, (length(pvals[[1]])-1):length(pvals[[1]]))], pvals[[2]][c(2:4, (length(pvals[[2]])-1):length(pvals[[2]]))], pvals[[3]][c(2:4, (length(pvals[[3]])-1):length(pvals[[3]]))]), method = "BH")
print(padj)

depvar = "stratified_hospitalized"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":agegroup + agegroup + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":agegroup + agegroup + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_age[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_age[[q]], vcov = vcovHC(deathregs_age[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_age, se = se_iv_race, p = pvals, type = "latex", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "age 18-64", "age 65+", "PM_{2.5}:age 18-64", "PM_{2.5}:age 65+", 
                               "NO_{2}:age 18-64", "NO_{2}:age 65+", "NO:age 18-64", "NO:age 65+")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:4, (length(pvals[[1]])-1):length(pvals[[1]]))], pvals[[2]][c(2:4, (length(pvals[[2]])-1):length(pvals[[2]]))], pvals[[3]][c(2:4, (length(pvals[[3]])-1):length(pvals[[3]]))]), method = "BH")
print(padj)
```

### Age and Race
# note F stats are not robust - including vcov. = sandwich throws an error because not all puma FEs can be estimated

```{r, results="asis"}
deathregs_strat <- rep(list(NA), 24)
se_iv_strat <- rep(list(NA), 24)

q <- 1
for (i in indvars[1:3]) {
        for (j in race_strat) {
                for (k in age_strat) {
                        deathregs_strat[[q]] <- iv_estimation("stratified_totaldeaths", i, "All", 500, age = k, race = j)
                        se_iv_strat[[q]] <- sqrt(diag(vcovHC(deathregs_strat[[q]][[1]], type = "HC1")))
                        q <- q+1       
                }
        }
}

stargazer(lapply(deathregs_strat, function(x) x[[1]]), se = se_iv_strat, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Age", rep(c("18-64", "65+"), 12)),
                           c("Race", rep(c(rep("W", 2), rep("H", 2), rep("B", 2), rep("A", 2)), 3)),
                           c("F stat", unlist(lapply(deathregs_strat, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(deathregs_strat, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )

hospregs_strat <- rep(list(NA), 24)
se_iv_strath <- rep(list(NA), 24)

q <- 1
for (i in indvars[1:3]) {
        for (j in race_strat) {
                for (k in age_strat) {
                        hospregs_strat[[q]] <- iv_estimation("stratified_hospitalized", i, "All", 500, age = k, race = j)
                        se_iv_strath[[q]] <- sqrt(diag(vcovHC(hospregs_strat[[q]][[1]], type = "HC1")))
                        q <- q+1       
                }
        }
}

stargazer(lapply(hospregs_strat, function(x) x[[1]]), se = se_iv_strath, type = "text", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Age", rep(c("18-64", "65+"), 12)),
                           c("Race", rep(c(rep("W", 2), rep("H", 2), rep("B", 2), rep("A", 2)), 3)),
                           c("F stat", unlist(lapply(hospregs_strat, 
                                                          function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[1,3], 2)))),
                           c("Sargan p val", unlist(lapply(hospregs_strat, 
                                                             function(x) round(summary(x[[1]], diagnostics = TRUE)$diagnostics[3,4], 2)))))
          )
```

### Age and Race Tables - Interactions approach

```{r, results="asis"}
deathregs_age <- rep(list(NA), 3)
se_iv_age <- rep(list(NA), 3)
pvals <- rep(list(NA), 3)

df <- filter(df.str, tot_population > 500 & near_dist >=50 & near_dist <= 500  &
               raceethnicity %in% c("White", "Hispanic/Latino", "Black/African American", "Asian/Pacific Islander")) %>%
  mutate(demo_group = paste(agegroup, raceethnicity, sep = "_"))
ivs_raw <- "downwind_0_r"
depvar = "stratified_totaldeaths"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":demo_group + demo_group + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":demo_group + demo_group + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_age[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_age[[q]], vcov = vcovHC(deathregs_age[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_age, se = se_iv_race, p = pvals, type = "latex", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:13, (length(pvals[[1]])-10):length(pvals[[1]]))], pvals[[2]][c(2:13, (length(pvals[[2]])-10):length(pvals[[2]]))], pvals[[3]][c(2:13, (length(pvals[[3]])-10):length(pvals[[3]]))]), method = "BH")
print(padj)


depvar = "stratified_hospitalized"

q <- 1
for (i in indvars[1:3]) {
  formula <- as.formula(paste("log(", depvar, " + 1) ~", i, "+", i, ":demo_group + demo_group + near_dist + as.factor(puma) + 
                            as.factor(station) |", ivs_raw, "+", ivs_raw, ":demo_group + demo_group + near_dist + 
                            as.factor(puma) + as.factor(station)"))
  deathregs_age[[q]] <- ivreg(formula, data = df)
  test <- coeftest(deathregs_age[[q]], vcov = vcovHC(deathregs_age[[q]], type="HC1"))
  se_iv_race[[q]] <- test[, 2]
  pvals[[q]] <- test[, 4]
  q <- q+1
}

stargazer(deathregs_age, se = se_iv_race, p = pvals, type = "latex", style = "QJE", df = FALSE, dep.var.labels = "Hospitalizations - log-linear",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO")
          )

# multiple hypotheses testing
padj <- p.adjust(c(pvals[[1]][c(2:13, (length(pvals[[1]])-10):length(pvals[[1]]))], pvals[[2]][c(2:13, (length(pvals[[2]])-10):length(pvals[[2]]))], pvals[[3]][c(2:13, (length(pvals[[3]])-10):length(pvals[[3]]))]), method = "BH")
print(padj)
```

## First stage - stratified race regs

```{r, results="asis"}
dow1stg0pmrw <- lm(pm_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="White")

dow1stg0pmrb <- lm(pm_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Black/African American")

dow1stg0pmrh <- lm(pm_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Hispanic/Latino")

dow1stg0pmra <- lm(pm_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Asian/Pacific Islander")

dow1stg0norw <- lm(no_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="White")

dow1stg0norb <- lm(no_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Black/African American")

dow1stg0norh <- lm(no_idw ~ downwind_1_r + downwind_0_r  + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Hispanic/Latino")

dow1stg0nora <- lm(no_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Asian/Pacific Islander")

dow1stg0no2rw <- lm(no2_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="White")

dow1stg0no2rb <- lm(no2_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Black/African American")

dow1stg0no2rh <- lm(no2_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Hispanic/Latino")

dow1stg0no2ra <- lm(no2_idw ~ downwind_1_r + downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.str_race, subset = population > 50 & intersect_ctr==0 & near_dist < 1000 & below_110th==0 & raceethnicity=="Asian/Pacific Islander")

firststgr <- list(dow1stg0pmrw, dow1stg0pmrb, dow1stg0pmrh, dow1stg0pmra)
firststgrNO <- list(dow1stg0norw, dow1stg0norb, dow1stg0norh, dow1stg0nora)
firststgrNO2 <- list(dow1stg0no2rw, dow1stg0no2rb, dow1stg0no2rh, dow1stg0no2ra)

stargazer(firststgr, type = "text", style = "QJE", df = FALSE, dep.var.labels = c("PM_{2.5}"),
          omit = c("puma", "station", "near_dist"), omit.stat = c("ser"), keep.stat = c("n"), digits = 2,
          se = lapply(firststgr, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          add.lines = list(c(rep("", 4)),
                           c("Geography", rep("Outer Borough", 4)),
                           c("Maximum Distance from Highway", rep("1 km", 4)),
                           c("Population", rep("50", 4)),
                           c("Race", "White", "Black/African American", "Hispanic/Latino", "Asian/Pacific Islander"),
                           c(rep("", 4)))
          )

stargazer(firststgrNO, type = "text", style = "QJE", df = FALSE, dep.var.labels = c("NO"),
          omit = c("puma", "station", "near_dist"), omit.stat = c("ser"), keep.stat = c("n"), digits = 2,
          se = lapply(firststgrNO, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          add.lines = list(c(rep("", 4)),
                           c("Geography", rep("Outer Borough", 4)),
                           c("Maximum Distance from Highway", rep("1 km", 4)),
                           c("Population", rep("50", 4)),
                           c("Race", "White", "Black/African American", "Hispanic/Latino", "Asian/Pacific Islander"),
                           c(rep("", 4)))
          )

stargazer(firststgrNO2, type = "text", style = "QJE", df = FALSE, dep.var.labels = c("NO_{2}"),
          omit = c("puma", "station", "near_dist"), omit.stat = c("ser"), keep.stat = c("n"), digits = 2,
          se = lapply(firststgrNO2, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          add.lines = list(c(rep("", 4)),
                           c("Geography", rep("Outer Borough", 4)),
                           c("Maximum Distance from Highway", rep("1 km", 4)),
                           c("Population", rep("50", 4)),
                           c("Race", "White", "Black/African American", "Hispanic/Latino", "Asian/Pacific Islander"),
                           c(rep("", 4)))
          )
```

### Log linear predictions

```{r, results="asis"}
dfat <- filter(df.master, tot_population > 500 & near_dist>=50 & near_dist < 500)

dy.dx.ame.at <- function(ind_var, depvar, at, data, indices){
        dfi <- data[indices, ]
        naiveregfep <- ivreg(as.formula(paste("log(", depvar, "+ 1) ~", ind_var, "+ near_dist + as.factor(puma) + as.factor(station) |
                                 downwind_1_r + downwind_0_r + near_dist + as.factor(puma) + as.factor(station)")),
                data = dfi)
        b1 <- naiveregfep$coefficients[2]
        ehats <- naiveregfep$residuals
        dat2 <- dfi
        dat2[, ind_var] <- at
        preds <- predict(naiveregfep, newdata = dat2)
        return(mean(exp(preds)*mean(exp(ehats)) - 1))
}

cpm <- seq(7, 10.5, by = .5)
cno <- seq(16, 23, by = 1)
cno2 <- seq(16, 23, by = 1)

preds_pm <- rep(list(NA), 8)
preds_pm_h <- rep(list(NA), 8)

for (i in c(1:8)) {
        a <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "pm_idw", depvar = "cumulative_total_deaths", at = cpm[i], parallel = "multicore", ncpus = 5)
        preds_pm[[i]] <- boot.ci(a, type = "perc")
        b <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "pm_idw", depvar = "cumulative_hospitalized", at = cpm[i], parallel = "multicore", ncpus = 5)
        preds_pm_h[[i]] <- boot.ci(b, type = "perc")
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_pm, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_pm, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("Concentration", cpm))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_pm_h, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_pm_h, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("Concentration", cpm))
          )

preds_no2 <- rep(list(NA), 8)
preds_no2_h <- rep(list(NA), 8)
q <- 1
for (i in cno2) {
        a <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "no2_idw", depvar = "cumulative_total_deaths", at = cno2[q], parallel = "multicore", ncpus = 5)
        preds_no2[[q]] <- boot.ci(a, type = "perc")
        b <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "no2_idw", depvar = "cumulative_hospitalized", at = cno2[q], parallel = "multicore", ncpus = 5)
        preds_no2_h[[q]] <- boot.ci(b, type = "perc")
        q <- q+1
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no2, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no2, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("Concentration", cno2))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no2_h, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no2_h, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("Concentration", cno2))
          )

preds_no <- rep(list(NA), 8)
preds_no_h <- rep(list(NA), 8)
q <- 1
for (i in cno) {
        a <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "no_idw", depvar = "cumulative_total_deaths", at = cno[q], parallel = "multicore", ncpus = 5)
        preds_no[[q]] <- boot.ci(a, type = "perc")
        b <- boot(data = dfat, statistic = dy.dx.ame.at, R = 1000, ind_var = "no_idw", depvar = "cumulative_hospitalized", at = cno[q], parallel = "multicore", ncpus = 5)
        preds_no_h[[q]] <- boot.ci(b, type = "perc")
        q <- q+1
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("Concentration", cno))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no_h, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no_h, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("Concentration", cno))
          )

```

### Poisson Predictions

```{r, results="asis", message=FALSE}
poismfx.at <- function(ind_var, depvar, at, data, indices){
        dfi <- data[indices, ]
        npstg1 <- lm(as.formula(paste(ind_var, "~ downwind_1_r + downwind_0_r + near_dist + 
                                             as.factor(puma) + as.factor(station)")),
                            data = dfi)
        dfi$stg1e <- npstg1$residuals
        poisreg <- fepois(as.formula(paste(depvar, "~", ind_var, "+ stg1e + near_dist + as.factor(station) | as.factor(puma)")),
                            data = dfi)
        dat2 <- dfi
        dat2[, ind_var] <- at
        preds <- predict(poisreg, newdata = dat2)
        return(mean(preds))
}

preds_pm_p <- rep(list(NA), 8)
preds_pm_h_p <- rep(list(NA), 8)
q <- 1
for (i in cpm) {
        a <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "pm_idw", depvar = "cumulative_total_deaths", at = cpm[q], parallel = "multicore", ncpus = 5)
        preds_pm_p[[q]] <- boot.ci(a, type = "perc")
        b <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "pm_idw", depvar = "cumulative_hospitalized", at = cpm[q], parallel = "multicore", ncpus = 5)
        preds_pm_h_p[[q]] <- boot.ci(b, type = "perc")
        q <- q+1
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_pm_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_pm_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("Concentration", cpm))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_pm_h_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_pm_h_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("PM_{2.5}"),
          add.lines = list(c("Concentration", cpm))
          )

preds_no2_p <- rep(list(NA), 8)
preds_no2_h_p <- rep(list(NA), 8)
q <- 1
for (i in cno2) {
        a <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "no2_idw", depvar = "cumulative_total_deaths", at = cno2[q], parallel = "multicore", ncpus = 5)
        preds_no2_p[[q]] <- boot.ci(a, type = "perc")
        b <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "no2_idw", depvar = "cumulative_hospitalized", at = cno2[q], parallel = "multicore", ncpus = 5)
        preds_no2_h_p[[q]] <- boot.ci(b, type = "perc")
        q <- q+1
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no2_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no2_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("Concentration", cno2))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no2_h_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no2_h_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO_{2}"),
          add.lines = list(c("Concentration", cno2))
          )

preds_no_p <- rep(list(NA), 8)
preds_no_h_p <- rep(list(NA), 8)
q <- 1
for (i in cpm) {
        a <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "no_idw", depvar = "cumulative_total_deaths", at = cno[q], parallel = "multicore", ncpus = 5)
        preds_no_p[[q]] <- boot.ci(a, type = "perc")
        a <- boot(data = dfat, statistic = poismfx.at, R = 1000, ind_var = "no_idw", depvar = "cumulative_hospitalized", at = cno[q], parallel = "multicore", ncpus = 5)
        preds_no_h_p[[q]] <- boot.ci(a, type = "perc")
        q <- q+1
}

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Deaths at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("Concentration", cno))
          )

stargazer(rep(hospregs_o[c(1:2)], 4), ci = TRUE, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Predicted Hospitalizations at Different Concentrations - Poisson", star.cutoffs = NA, omit.table.layout = "n",
          coef = lapply(preds_no_h_p, function(x) c(1, x$t0, rep(1, 57))),
          ci.custom = lapply(preds_no_h_p, function(x) cbind(c(0, x$percent[4], rep(0,57)),
                                                            c(1, x$percent[5], rep(1,57)))),                                                          
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = "all", digits = 2,
          covariate.labels = c("NO"),
          add.lines = list(c("Concentration", cno))
          )
```

### Robustness Checks

```{r, results="asis"}
controls <- " + log(tot_population) + log(tot_inc_percap) + tot_owner_occupied + tot_public_assistance + tot_rent2income50percent +
                    tot_ed_hs + tot_ed_none + black + hispanic + 
                    tot_age18_44 + tot_age45_54 + tot_age55_64 + tot_age65_74 + tot_age75up + 
                    device_chg"

extracontrols <- "+ median_tenure + tot_institutionalized + tot_service_occupation + 
                tot_uninsured + tot_public_transport"

robustregs <- rep(list(NA), 6)
r_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                robustregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 500, controls, ql = .05, qh = .05)
                r_ses[[q]] <- lapply(robustregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(robustregs, function(x) x[[1]]), se = lapply(r_ses, function(x) x[[1]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - Population cutoff 100",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )

stargazer(lapply(robustregs, function(x) x[[2]]), se = lapply(r_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - top and bottom 5% windsorized by tract population",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )

stargazer(lapply(robustregs, function(x) x[[3]]), se = lapply(r_ses, function(x) x[[3]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - Tracts between 200 and 2000 m from highway",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )

stargazer(lapply(robustregs, function(x) x[[5]]), se = lapply(r_ses, function(x) x[[5]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - Pollution measured at Closest Monitor",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )

stargazer(lapply(robustregs, function(x) x[[4]]), se = lapply(r_ses, function(x) x[[4]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - Demographic Controls",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO", "Log Tract Population", "Log Income Per Capita", 
                               "% Owner Occupied", "% Received Public Assistance", "% Rent-Income > 50 %",
                               "% Education: High School", "% Education: less than high school",
                               "% Black/African American", "% Hispanic/Latino", "% Age 18-44", "% Age 45-54",
                               "% Age 55-64", "% Age 65-74", "% Age 75+", "Safegraph Device Change"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )

robustregs2 <- rep(list(NA), 6)
r_ses2 <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                robustregs2[[q]] <- robustimations("cumulative_total_deaths", i, j, 500, 
                                                   controls = paste(controls, extracontrols), ql = .05, qh = .05)
                r_ses2[[q]] <- lapply(robustregs2[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(robustregs2, function(x) x[[4]]), se = lapply(r_ses2, function(x) x[[4]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - Additional Angles",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)))
          )





```

### Rate based depvar

```{r, results="asis"}
ratereg_iv <- rep(list(NA), 6)
se_riv <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_iv[[q]] <- iv_estimation("death_rate", i, j, 500)
                se_riv[[q]] <- sqrt(diag(vcovHC(ratereg_iv[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_iv, se = se_riv, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - IV - Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

ratereg_iv <- rep(list(NA), 6)
se_riv <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_iv[[q]] <- iv_estimation("adjdeathrate", i, j, 500)
                se_riv[[q]] <- sqrt(diag(vcovHC(ratereg_iv[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_iv, se = se_riv, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - IV - Adjusted Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

ratereg_ols <- rep(list(NA), 6)
se_rols <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_ols[[q]] <- ols_estimation_w_cont("death_rate", i, j, 500, controls = "")
                se_rols[[q]] <- sqrt(diag(vcovHC(ratereg_ols[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_ols, se = se_rols, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - OLS - Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

ratereg_olsc <- rep(list(NA), 6)
se_rolsc <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_olsc[[q]] <- ols_estimation_w_cont("death_rate", i, j, 500, controls = controls)
                se_rolsc[[q]] <- sqrt(diag(vcovHC(ratereg_olsc[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_olsc, se = se_rolsc, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - OLS - Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

ratereg_olscw <- rep(list(NA), 6)
se_rolscw <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_olscw[[q]] <- ols_estimation_w_cont("death_rate", i, j, 500, controls = controls, weights = "tot")
                se_rolscw[[q]] <- sqrt(diag(vcovHC(ratereg_olscw[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_olscw, se = se_rolscw, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - OLS - Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

ratereg_adjols <- rep(list(NA), 6)
se_adjols <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                ratereg_adjols[[q]] <- ols_estimation_w_cont("adjdeathrate", i, j, 500, controls = controls, weights = "adj")
                se_adjols[[q]] <- sqrt(diag(vcovHC(ratereg_adjols[[q]], type = "HC1")))
                q <- q+1
        }
}

stargazer(ratereg_adjols, se = se_adjols, type = "text", style = "QJE", df = FALSE, 
          dep.var.labels = "Deaths - OLS - Adjusted Deaths/100,000",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c(rep("", 7)),
                           c("Geography", rep(c("Citywide", "Outer Boroughs"), 3)),
                           c(rep("", 7)))
          )

```

### Diff population threshold cutoffs
(not in paper)

```{r, results="asis"}
popregs <- rep(list(NA), 6)
p_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                popregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 1000, controls, ql = .05, qh = 0)
                p_ses[[q]] <- lapply(popregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(popregs, function(x) x[[2]]), se = lapply(p_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - bottom 5% windsorized",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3))))

popregs <- rep(list(NA), 6)
p_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                popregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 1000, controls, ql = .1, qh = 0)
                p_ses[[q]] <- lapply(popregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(popregs, function(x) x[[2]]), se = lapply(p_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - bottom 10% windsorized",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3))))

popregs <- rep(list(NA), 6)
p_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                popregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 1000, controls, ql = .25, qh = 0)
                p_ses[[q]] <- lapply(popregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(popregs, function(x) x[[2]]), se = lapply(p_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - bottom 25% windsorized",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3))))

popregs <- rep(list(NA), 6)
p_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                popregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 1000, controls, ql = .33, qh = 0)
                p_ses[[q]] <- lapply(popregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(popregs, function(x) x[[2]]), se = lapply(p_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - bottom 33% windsorized",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3))))


popregs <- rep(list(NA), 6)
p_ses <- rep(list(NA), 6)
q <- 1
for (i in indvars[1:3]) {
        for (j in geogs) {
                popregs[[q]] <- robustimations("cumulative_total_deaths", i, j, 1000, controls, ql = .5, qh = 0)
                p_ses[[q]] <- lapply(popregs[[q]], function(x) sqrt(diag(vcovHC(x, type = "HC1"))))
                q <- q + 1
        }
}

stargazer(lapply(popregs, function(x) x[[2]]), se = lapply(p_ses, function(x) x[[2]]), 
          type = "text", style = "QJE", df = FALSE, dep.var.labels = "Deaths - log-linear - bottom 50% windsorized",
          omit = c("puma", "station", "near_dist", "Constant"), omit.stat = c("f", "rsq", "ser"), digits = 2,
          covariate.labels = c("PM_{2.5}", "NO_{2}", "NO"),
          add.lines = list(c("Geography", rep(c("Citywide", "Outer Boroughs"), 3))))
```

### Quantile Regression
(paper version run in stata)

```{r, results="asis"}
qregiv <- function(data, ind_var, depvar, ivs_raw, t, indices){
        dfi <- data[indices,]
        qstg1 <- lm(as.formula(paste(ind_var, "~", ivs_raw, "+ near_dist + as.factor(puma) + as.factor(station)")),
                    data = dfi)
        dfi$resid <- qstg1$residuals
        qreg <- rq(as.formula(paste("log(", depvar, "+ 1) ~", ind_var, "+ resid + near_dist + as.factor(puma) + as.factor(station)")),
                                    tau = t, data = dfi)
        return(as.numeric(coef(qreg)[2]))
}

qregboot <- function(t, geography, restriction, depvar, ind_var) {
        cdfq <- createdf(geography, restriction)
        dfq <- cdfq[[1]] %>%
                filter(intersect_ctr==0 & tot_population > 500)
        ivs_raw_q <- cdfq[[2]]
        qboot <- boot(data = dfq, statistic = qregiv, R = 1000, depvar = depvar, ind_var = ind_var, ivs_raw = ivs_raw_q, t=t, 
              parallel = "multicore", ncpus = 6)
        qbootci <- boot.ci(qboot, type = "norm")
        return(qbootci)
}

taus <- seq(.1, .9, by = .1)
p <- list()
q <- 1
for (i in c("pm_idw", "no2_idw", "no_idw")) {
        for (j in geogs) {
                bootobjs <- lapply(taus, qregboot, geography = j, restriction = 1000, depvar = "cumulative_total_deaths", ind_var = i)
                qdfplt <- data.frame("tau" = taus,
                                    "coef" = unlist(lapply(bootobjs, function(x) x$t0)),
                                    "ymin" = unlist(lapply(bootobjs, function(x) x$normal[2])),
                                    "ymax" = unlist(lapply(bootobjs, function(x) x$normal[3])))
                p[[q]] <- ggplot(qdfplt, aes(x = tau, y = coef)) + 
                        geom_line() +
                        geom_pointrange(aes(ymin = ymin, ymax = ymax)) + theme_light() +
                        labs(title = i, x = "quantile", y = "coefficient", subtitle = j)
                q <- q+1
        }
}

p <- lapply(p, print)

```

Descriptive quantile results

```{r, results="asis"}
df.sample <- filter(df.master, tot_population > 500 & intersect_ctr==0 & near_dist<1000 & below_110th==0)

quantile(df.sample$cumulative_total_deaths, seq(.1, .9, .1))

mean(df.sample$black)
mean(df.sample$black[df.sample$cumulative_total_deaths > 20])
mean(df.sample$hispanic)
mean(df.sample$hispanic[df.sample$cumulative_total_deaths > 20])
mean(df.sample$tot_inc_percap, na.rm = TRUE)
mean(df.sample$tot_inc_percap[df.sample$cumulative_total_deaths > 20])
```

### Correlations with observables
(not in paper)

```{r, results="asis"}
popreg1 <- lm(tot_population ~ downwind_0_r + 
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

popreg2 <- lm(tot_population ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

blackreg1 <- lm(black ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

blackreg2 <- lm(black ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

whitereg1 <- lm(white ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

whitereg2 <- lm(white ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

hispreg1 <- lm(hispanic ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

hispreg2 <- lm(hispanic ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

agereg1 <- lm(tot_age75up ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

agereg2 <- lm(tot_age75up ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

agereg3 <- lm(tot_age65_74 ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

agereg4 <- lm(tot_age65_74 ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

occreg2 <- lm(tot_service_occupation ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

occreg1 <- lm(tot_service_occupation ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

edreg2 <- lm(tot_ed_bach ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

edreg1 <- lm(tot_ed_bach ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

increg1 <- lm(tot_inc_percap ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

increg2 <- lm(tot_inc_percap ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

devreg1 <- lm(device_chg1020 ~ downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 500 & below_110th==0)

devreg2 <- lm(device_chg1020 ~ downwind_1_r + downwind_0_r +
                      as.factor(puma) + as.factor(station) + near_dist,
                         data = df.master, subset = tot_population > 500 & intersect_ctr==0 & near_dist < 1000 & below_110th==0)

balregs1 <- list(popreg1, blackreg1, whitereg1, hispreg1, agereg1, agereg3, occreg1, edreg1, increg1, devreg1)
balregs2 <- list(popreg2, blackreg2, whitereg2, hispreg2, agereg2, agereg4, occreg2, edreg2, increg2, devreg2)

stargazer(balregs1, se = lapply(balregs1, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
          type = "text", omit = c("puma", "station", "near_dist"), df = FALSE,
          add.lines = list(c("Geography", rep("OB", 10)),
                           c("Restriction", rep("500", 10))))

stargazer(balregs2, se = lapply(balregs2, function(x) sqrt(diag(vcovHC(x, type = "HC1")))), 
          type = "text", omit = c("puma", "station", "near_dist"), df = FALSE,
          add.lines = list(c("Geography", rep("OB", 10)),
                           c("Restriction", rep("1000", 10))))

```

### Summary Stats table

```{r, results="asis"}
df.sumstats <- df.master %>%
        mutate(distance_km = near_dist/1000) %>%
        filter(intersect_ctr==0 & tot_population > 500) %>%
        select(tot_population, downwind_5_r, downwind_3_r, downwind_2_r, downwind_1_r, downwind_0_r, distance_km,
               pm_idw, no2_idw, no_idw, tot_inc_percap, device_chg1020, cumulative_total_deaths, cumulative_hospitalized, cumulative_cases)

stargazer(data.frame(df.sumstats), type = "text", digits = 2, 
          covariate.labels = c("Tract Population", "Downwind 3-5km", "Downwind 2-3km", "Downwind 1-2km", "Downwind 0.5-1km", "Downwind < 0.5km", "Distance to Closest Highway",
                           "PM_{2.5}", "NO_{2}", "NO", "Per Capita Income", "Change in 'Home' Devices (March 8 to May 11)",  
                           "COVID-19 Deaths", "COVID-19 Hospitalizations", "COVID-19 Cases")
          )

df.sumstats_ob <- df.master %>% 
        mutate(distance_km = near_dist/1000) %>%
        filter(intersect_ctr==0 & near_dist < 1000 & tot_population > 500 &  below_110th==0) %>%
        select(tot_population, downwind_5_r, downwind_3_r, downwind_2_r, downwind_1_r, downwind_0_r, distance_km,
               pm_idw, no2_idw, no_idw, tot_inc_percap, device_chg1020, cumulative_total_deaths, cumulative_hospitalized, cumulative_cases)

stargazer(data.frame(df.sumstats_ob), type = "text", digits = 2, 
          covariate.labels = c("Tract Population", "Downwind 3-5km", "Downwind 2-3km", "Downwind 1-2km", "Downwind 0.5-1km", "Downwind < 0.5km", "Distance to Closest Highway",
                           "PM_{2.5}", "NO_{2}", "NO", "Per Capita Income", "Change in 'Home' Devices (March 8 to May 11)",  
                           "COVID-19 Deaths", "COVID-19 Hospitalizations", "COVID-19 Cases")
          )

```

```{r, results="asis"}
df.sumstats_ob_race <- df.str_race %>% 
        mutate(distance_km = near_dist/1000) %>%
        filter(intersect_ctr==0 & near_dist < 1000 & population > 150 & below_110th==0 & raceethnicity %in% race_strat) %>%
        select(population, raceethnicity, tract, downwind_5_r, downwind_3_r, downwind_2_r, downwind_1_r, downwind_0_r, distance_km,
               pm_idw, no2_idw, no_idw, tot_inc_percap, device_chg1020, stratified_totaldeaths, stratified_hospitalized, stratified_cases) %>%
        pivot_longer(-c(raceethnicity, tract), names_to = "var", values_to = "value") %>%
        mutate(race_var = paste(raceethnicity, var, sep = "_"),
               tract_race = paste(tract, raceethnicity, sep = "_")) %>%
        select(value, race_var, tract_race) %>%
        pivot_wider(names_from = "race_var", values_from = "value")

stargazer(data.frame(df.sumstats_ob_race), type = "text", digits = 2
          )

```


